library(devtools) library(rbenchmark) library(microbenchmark)
path <- "~/clase/proyecto r/ProyectoVirusTotal/Android"
Tal y como se ha explicado en el anterior capítulo de clasificación, cargamos los datos en una lista con la información de los json, como vamos a trabajar con los permisos necesitamos que estén presentes en el dataset posterioremente, sin embargo la función de spread_all nos pone a null los elementos que son múltiples, en este caso los permisos se componen de 3 elementos, por eso los tenemos que corregir antes de pasarlos a dataset, una vez hecho eso, volvemos a usar spread_all para tenerlos en dataframe.
files <- dir(path, pattern = "*.json")
cl <- makeCluster(detectCores() -1 )
json_files<-list.files(path =path,pattern="*.json",full.names = TRUE)
json_list<-parLapply(cl,json_files,function(x) jsonlite::read_json(path = x , simplifyVector = TRUE))
for (j in 1:length(json_list)) {
if(length(json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]]) >0){
for (k in 1:length(json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]])){
json_list[[j]][["additional_info"]][["androguard"]][["Permissions"]][[k]] <- TRUE
}
}
}
stopCluster(cl)
json_tabla_permisos_cambiados <- json_list %>%
spread_all()
Filtramos las columnas de sha256 que es lo que usaremos para identificar a cada archivo, aunque por comodidad de ver los gráficos posteriores seguiremos con cada archivo identificado por su posición en las visualizacioes, posteriormente seleccionamos todos los permisos y le asignamos 0 a todos los NA, ya que en este caso tener un NA implica que no tiene ese permiso.
sha_datos_permisos_positives <- as.data.frame(json_tabla_permisos_cambiados) %>% plotly::select( sha256, total, positives , matches("androguard.Permissions.android.permission.*") )
sha_datos_permisos_positives[is.na(sha_datos_permisos_positives)] <- 0
colnames(sha_datos_permisos_positives) <- gsub("additional_info.androguard.Permissions.android.permission.","",colnames(sha_datos_permisos_positives))
Primero probamos con una regresión lineal, con la que obtenemos 0.6469 de r-squared, sin embargo tenemos demasiadas variables en la regresión lineal, las vamos a filtrar usando stepAICm así pasamos de 96 permisos a 23 permisos que son los más relevantes de cara a que den positivos los test.
permisos.regresion <- lm(formula = positives ~ . -total-sha256, data = sha_datos_permisos_positives )
plot(permisos.regresion)
## Warning: not plotting observations with leverage one:
## 4, 25, 31, 34, 40, 41, 45, 56, 65, 68, 75, 78, 85, 103, 124, 136, 152, 181
summary(permisos.regresion)
##
## Call:
## lm(formula = positives ~ . - total - sha256, data = sha_datos_permisos_positives)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.109 -1.043 0.000 1.500 5.891
##
## Coefficients: (60 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.1087 0.3917 56.438 < 2e-16 ***
## SYSTEM_ALERT_WINDOW -0.8869 2.6578 -0.334 0.739099
## ACCESS_NETWORK_STATE -3.7415 4.4829 -0.835 0.405302
## ACCESS_COARSE_LOCATION 5.4804 3.5143 1.559 0.121049
## WAKE_LOCK -11.0668 3.6681 -3.017 0.003012 **
## INTERNET 8.6567 4.2323 2.045 0.042610 *
## WRITE_EXTERNAL_STORAGE -0.9811 1.4137 -0.694 0.488780
## RECEIVE_BOOT_COMPLETED -3.0559 4.2219 -0.724 0.470332
## QUICKBOOT_POWERON 15.8335 47.2107 0.335 0.737820
## ACCESS_WIFI_STATE 7.3468 3.1571 2.327 0.021336 *
## GET_TASKS 12.8226 7.5549 1.697 0.091778 .
## CHANGE_WIFI_STATE -5.2993 20.6905 -0.256 0.798215
## READ_PHONE_STATE -10.0000 4.2848 -2.334 0.020967 *
## BLUETOOTH -12.5924 30.1403 -0.418 0.676713
## REQUEST_IGNORE_BATTERY_OPTIMIZATIONS 11.6521 8.7418 1.333 0.184636
## REQUEST_DELETE_PACKAGES -18.3770 30.1950 -0.609 0.543728
## QUERY_ALL_PACKAGES 19.8424 59.6554 0.333 0.739900
## SEND_SMS 14.9571 4.1688 3.588 0.000454 ***
## KILL_BACKGROUND_PROCESSES -19.5711 19.4014 -1.009 0.314765
## WRITE_SMS -13.5012 51.8686 -0.260 0.795003
## CALL_PHONE 3.0826 5.3819 0.573 0.567682
## VIBRATE 2.6961 4.5887 0.588 0.557744
## RECEIVE_SMS 15.7169 31.5914 0.498 0.619580
## READ_CONTACTS -4.2612 28.1387 -0.151 0.879841
## FOREGROUND_SERVICE -3.2051 5.1995 -0.616 0.538571
## READ_SMS 3.0112 27.2689 0.110 0.912223
## READ_PHONE_NUMBERS -6.2717 20.8237 -0.301 0.763705
## GET_ACCOUNTS 39.7731 36.5563 1.088 0.278389
## AUTHENTICATE_ACCOUNTS -77.1361 18.2014 -4.238 3.98e-05 ***
## RECEIVE_MMS 37.4320 31.6962 1.181 0.239538
## REORDER_TASKS -12.3745 20.5625 -0.602 0.548240
## WRITE_SYNC_SETTINGS NA NA NA NA
## USE_FULL_SCREEN_INTENT 30.6103 13.1541 2.327 0.021337 *
## CHANGE_NETWORK_STATE 3.9825 41.8555 0.095 0.924327
## ACCESS_COARSE_UPDATES NA NA NA NA
## PROCESS_OUTGOING_CALLS NA NA NA NA
## BLUETOOTH_ADMIN NA NA NA NA
## ACCESS_FINE_LOCATION -11.9546 5.4484 -2.194 0.029805 *
## BOOT_COMPLETED NA NA NA NA
## REQUEST_INSTALL_PACKAGES -16.2227 9.2944 -1.745 0.083014 .
## RECEIVE_USER_PRESENT NA NA NA NA
## ACCESS_LOCATION_EXTRA_COMMANDS NA NA NA NA
## WRITE_CALL_LOG NA NA NA NA
## READ_CALL_LOG NA NA NA NA
## ACCESS_BACKGROUND_LOCATION NA NA NA NA
## WRITE_CONTACTS NA NA NA NA
## READ_EXTERNAL_STORAGE -1.7843 2.5368 -0.703 0.482939
## BROADCAST_STICKY NA NA NA NA
## MODIFY_AUDIO_SETTINGS NA NA NA NA
## SYSTEM_OVERLAY_WINDOW 23.4279 7.8390 2.989 0.003289 **
## RECORD_AUDIO NA NA NA NA
## MOUNT_UNMOUNT_FILESYSTEMS NA NA NA NA
## ANSWER_PHONE_CALLS NA NA NA NA
## POWER_SERVICE NA NA NA NA
## DISABLE_KEYGUARD NA NA NA NA
## ACCESS_MOCK_LOCATION NA NA NA NA
## READ_USER_DICTIONARY NA NA NA NA
## INTERACT_ACROSS_USERS_FULL NA NA NA NA
## CHANGE_WIFI_MULTICAST_STATE NA NA NA NA
## READ_INTERNAL_STORAGE NA NA NA NA
## DOWNLOAD_WITHOUT_NOTIFICATION NA NA NA NA
## ACCESS_CACHE_FILESYSTEM NA NA NA NA
## ACCESS_MTK_MMHW NA NA NA NA
## DIAGNOSTIC NA NA NA NA
## WRITE_SETTINGS NA NA NA NA
## SAMSUNG_TUNTAP NA NA NA NA
## WRITE_SECURE_SETTINGS NA NA NA NA
## PACKAGE_USAGE_STATS NA NA NA NA
## WRITE_INTERNAL_STORAGE NA NA NA NA
## READ_LOGS NA NA NA NA
## READ_PRIVILEGED_PHONE_STATE NA NA NA NA
## READ_CALENDAR NA NA NA NA
## WRITE_CALENDAR NA NA NA NA
## INJECT_EVENTS NA NA NA NA
## ACCESS_SUPERUSER NA NA NA NA
## ACCESS_NOTIFICATION_POLICY NA NA NA NA
## CAMERA NA NA NA NA
## USER_PRESENT NA NA NA NA
## SET_WALLPAPER NA NA NA NA
## INSTALL_PACKAGES NA NA NA NA
## DELETE_PACKAGES NA NA NA NA
## RESTART_PACKAGES NA NA NA NA
## CHANGE_CONFIGURATION NA NA NA NA
## SET_WALLPAPER_HINTS NA NA NA NA
## BROADCAST_PACKAGE_CHANGED NA NA NA NA
## BROADCAST_PACKAGE_REPLACED NA NA NA NA
## BROADCAST_PACKAGE_INSTALL NA NA NA NA
## FLASHLIGHT NA NA NA NA
## BROADCAST_PACKAGE_ADDED NA NA NA NA
## READ_APP_BADGE NA NA NA NA
## USES_POLICY_WIPE_DATA NA NA NA NA
## BIND_DEVICE_ADMIN NA NA NA NA
## SDCARD_WRITE NA NA NA NA
## START_ACTIVITIES_FROM_BACKGROUND NA NA NA NA
## MANAGE_OWN_CALLS NA NA NA NA
## MANAGE_EXTERNAL_STORAGE NA NA NA NA
## BIND_CALL_REDIRECTION_SERVICE NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.03 on 146 degrees of freedom
## Multiple R-squared: 0.6469, Adjusted R-squared: 0.5598
## F-statistic: 7.429 on 36 and 146 DF, p-value: < 2.2e-16
Ejecuto esta función por separado para poder evitar mostrar la salida
permisos.regresion_filtrados <- stepAIC(permisos.regresion, direction="both")
permisos.regresion_filtrados$anova
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## positives ~ (sha256 + total + SYSTEM_ALERT_WINDOW + ACCESS_NETWORK_STATE +
## ACCESS_COARSE_LOCATION + WAKE_LOCK + INTERNET + WRITE_EXTERNAL_STORAGE +
## RECEIVE_BOOT_COMPLETED + QUICKBOOT_POWERON + ACCESS_WIFI_STATE +
## GET_TASKS + CHANGE_WIFI_STATE + READ_PHONE_STATE + BLUETOOTH +
## REQUEST_IGNORE_BATTERY_OPTIMIZATIONS + REQUEST_DELETE_PACKAGES +
## QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES +
## WRITE_SMS + CALL_PHONE + VIBRATE + RECEIVE_SMS + READ_CONTACTS +
## FOREGROUND_SERVICE + READ_SMS + READ_PHONE_NUMBERS + GET_ACCOUNTS +
## AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + WRITE_SYNC_SETTINGS +
## USE_FULL_SCREEN_INTENT + CHANGE_NETWORK_STATE + ACCESS_COARSE_UPDATES +
## PROCESS_OUTGOING_CALLS + BLUETOOTH_ADMIN + ACCESS_FINE_LOCATION +
## BOOT_COMPLETED + REQUEST_INSTALL_PACKAGES + RECEIVE_USER_PRESENT +
## ACCESS_LOCATION_EXTRA_COMMANDS + WRITE_CALL_LOG + READ_CALL_LOG +
## ACCESS_BACKGROUND_LOCATION + WRITE_CONTACTS + READ_EXTERNAL_STORAGE +
## BROADCAST_STICKY + MODIFY_AUDIO_SETTINGS + SYSTEM_OVERLAY_WINDOW +
## RECORD_AUDIO + MOUNT_UNMOUNT_FILESYSTEMS + ANSWER_PHONE_CALLS +
## POWER_SERVICE + DISABLE_KEYGUARD + ACCESS_MOCK_LOCATION +
## READ_USER_DICTIONARY + INTERACT_ACROSS_USERS_FULL + CHANGE_WIFI_MULTICAST_STATE +
## READ_INTERNAL_STORAGE + DOWNLOAD_WITHOUT_NOTIFICATION + ACCESS_CACHE_FILESYSTEM +
## ACCESS_MTK_MMHW + DIAGNOSTIC + WRITE_SETTINGS + SAMSUNG_TUNTAP +
## WRITE_SECURE_SETTINGS + PACKAGE_USAGE_STATS + WRITE_INTERNAL_STORAGE +
## READ_LOGS + READ_PRIVILEGED_PHONE_STATE + READ_CALENDAR +
## WRITE_CALENDAR + INJECT_EVENTS + ACCESS_SUPERUSER + ACCESS_NOTIFICATION_POLICY +
## CAMERA + USER_PRESENT + SET_WALLPAPER + INSTALL_PACKAGES +
## DELETE_PACKAGES + RESTART_PACKAGES + CHANGE_CONFIGURATION +
## SET_WALLPAPER_HINTS + BROADCAST_PACKAGE_CHANGED + BROADCAST_PACKAGE_REPLACED +
## BROADCAST_PACKAGE_INSTALL + FLASHLIGHT + BROADCAST_PACKAGE_ADDED +
## READ_APP_BADGE + USES_POLICY_WIPE_DATA + BIND_DEVICE_ADMIN +
## SDCARD_WRITE + START_ACTIVITIES_FROM_BACKGROUND + MANAGE_OWN_CALLS +
## MANAGE_EXTERNAL_STORAGE + BIND_CALL_REDIRECTION_SERVICE) -
## total - sha256
##
## Final Model:
## positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON + ACCESS_WIFI_STATE +
## GET_TASKS + READ_PHONE_STATE + BLUETOOTH + QUERY_ALL_PACKAGES +
## SEND_SMS + KILL_BACKGROUND_PROCESSES + WRITE_SMS + RECEIVE_SMS +
## FOREGROUND_SERVICE + GET_ACCOUNTS + AUTHENTICATE_ACCOUNTS +
## RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT + ACCESS_FINE_LOCATION +
## REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW + READ_CALENDAR +
## WRITE_CONTACTS
##
##
## Step Df Deviance Resid. Df Resid. Dev
## 1 146 1340.216
## 2 - BIND_CALL_REDIRECTION_SERVICE 0 0.00000000 146 1340.216
## 3 - MANAGE_EXTERNAL_STORAGE 0 0.00000000 146 1340.216
## 4 - MANAGE_OWN_CALLS 0 0.00000000 146 1340.216
## 5 - START_ACTIVITIES_FROM_BACKGROUND 0 0.00000000 146 1340.216
## 6 - SDCARD_WRITE 0 0.00000000 146 1340.216
## 7 - BIND_DEVICE_ADMIN 0 0.00000000 146 1340.216
## 8 - USES_POLICY_WIPE_DATA 0 0.00000000 146 1340.216
## 9 - READ_APP_BADGE 0 0.00000000 146 1340.216
## 10 - BROADCAST_PACKAGE_ADDED 0 0.00000000 146 1340.216
## 11 - FLASHLIGHT 0 0.00000000 146 1340.216
## 12 - BROADCAST_PACKAGE_INSTALL 0 0.00000000 146 1340.216
## 13 - BROADCAST_PACKAGE_REPLACED 0 0.00000000 146 1340.216
## 14 - BROADCAST_PACKAGE_CHANGED 0 0.00000000 146 1340.216
## 15 - SET_WALLPAPER_HINTS 0 0.00000000 146 1340.216
## 16 - CHANGE_CONFIGURATION 0 0.00000000 146 1340.216
## 17 - RESTART_PACKAGES 0 0.00000000 146 1340.216
## 18 - DELETE_PACKAGES 0 0.00000000 146 1340.216
## 19 - INSTALL_PACKAGES 0 0.00000000 146 1340.216
## 20 - SET_WALLPAPER 0 0.00000000 146 1340.216
## 21 - USER_PRESENT 0 0.00000000 146 1340.216
## 22 - CAMERA 0 0.00000000 146 1340.216
## 23 - ACCESS_NOTIFICATION_POLICY 0 0.00000000 146 1340.216
## 24 - ACCESS_SUPERUSER 0 0.00000000 146 1340.216
## 25 - INJECT_EVENTS 0 0.00000000 146 1340.216
## 26 - WRITE_CALENDAR 0 0.00000000 146 1340.216
## 27 - READ_CALENDAR 0 0.00000000 146 1340.216
## 28 - READ_PRIVILEGED_PHONE_STATE 0 0.00000000 146 1340.216
## 29 - READ_LOGS 0 0.00000000 146 1340.216
## 30 - WRITE_INTERNAL_STORAGE 0 0.00000000 146 1340.216
## 31 - PACKAGE_USAGE_STATS 0 0.00000000 146 1340.216
## 32 - WRITE_SECURE_SETTINGS 0 0.00000000 146 1340.216
## 33 - SAMSUNG_TUNTAP 0 0.00000000 146 1340.216
## 34 - WRITE_SETTINGS 0 0.00000000 146 1340.216
## 35 - DIAGNOSTIC 0 0.00000000 146 1340.216
## 36 - ACCESS_MTK_MMHW 0 0.00000000 146 1340.216
## 37 - ACCESS_CACHE_FILESYSTEM 0 0.00000000 146 1340.216
## 38 - DOWNLOAD_WITHOUT_NOTIFICATION 0 0.00000000 146 1340.216
## 39 - READ_INTERNAL_STORAGE 0 0.00000000 146 1340.216
## 40 - CHANGE_WIFI_MULTICAST_STATE 0 0.00000000 146 1340.216
## 41 - INTERACT_ACROSS_USERS_FULL 0 0.00000000 146 1340.216
## 42 - READ_USER_DICTIONARY 0 0.00000000 146 1340.216
## 43 - ACCESS_MOCK_LOCATION 0 0.00000000 146 1340.216
## 44 - DISABLE_KEYGUARD 0 0.00000000 146 1340.216
## 45 - POWER_SERVICE 0 0.00000000 146 1340.216
## 46 - ANSWER_PHONE_CALLS 0 0.00000000 146 1340.216
## 47 - MOUNT_UNMOUNT_FILESYSTEMS 0 0.00000000 146 1340.216
## 48 - RECORD_AUDIO 0 0.00000000 146 1340.216
## 49 - MODIFY_AUDIO_SETTINGS 0 0.00000000 146 1340.216
## 50 - BROADCAST_STICKY 0 0.00000000 146 1340.216
## 51 - WRITE_CONTACTS 0 0.00000000 146 1340.216
## 52 - ACCESS_BACKGROUND_LOCATION 0 0.00000000 146 1340.216
## 53 - READ_CALL_LOG 0 0.00000000 146 1340.216
## 54 - WRITE_CALL_LOG 0 0.00000000 146 1340.216
## 55 - ACCESS_LOCATION_EXTRA_COMMANDS 0 0.00000000 146 1340.216
## 56 - RECEIVE_USER_PRESENT 0 0.00000000 146 1340.216
## 57 - BOOT_COMPLETED 0 0.00000000 146 1340.216
## 58 - BLUETOOTH_ADMIN 0 0.00000000 146 1340.216
## 59 - PROCESS_OUTGOING_CALLS 0 0.00000000 146 1340.216
## 60 - ACCESS_COARSE_UPDATES 0 0.00000000 146 1340.216
## 61 - WRITE_SYNC_SETTINGS 0 0.00000000 146 1340.216
## 62 - CHANGE_NETWORK_STATE 1 0.08310525 147 1340.299
## 63 - READ_SMS 1 0.03843852 148 1340.337
## 64 - READ_CONTACTS 1 0.58145426 149 1340.919
## 65 - SYSTEM_ALERT_WINDOW 1 1.29585809 150 1342.214
## 66 - READ_PHONE_NUMBERS 1 1.18654652 151 1343.401
## 67 - CALL_PHONE 1 4.30800958 152 1347.709
## 68 - WRITE_EXTERNAL_STORAGE 1 7.72828134 153 1355.437
## 69 - CHANGE_WIFI_STATE 1 8.87030329 154 1364.308
## 70 - VIBRATE 1 3.31596078 155 1367.624
## 71 - ACCESS_COARSE_LOCATION 1 13.95012507 156 1381.574
## 72 - ACCESS_NETWORK_STATE 1 10.84410359 157 1392.418
## 73 - RECEIVE_BOOT_COMPLETED 1 13.72871214 158 1406.147
## 74 + READ_CALENDAR 1 17.24512352 157 1388.901
## 75 - REQUEST_IGNORE_BATTERY_OPTIMIZATIONS 1 9.13624727 158 1398.038
## 76 - READ_EXTERNAL_STORAGE 1 7.98060122 159 1406.018
## 77 + WRITE_CONTACTS 1 15.66279105 158 1390.355
## 78 - REQUEST_DELETE_PACKAGES 1 0.23330862 159 1390.589
## AIC
## 1 438.3712
## 2 438.3712
## 3 438.3712
## 4 438.3712
## 5 438.3712
## 6 438.3712
## 7 438.3712
## 8 438.3712
## 9 438.3712
## 10 438.3712
## 11 438.3712
## 12 438.3712
## 13 438.3712
## 14 438.3712
## 15 438.3712
## 16 438.3712
## 17 438.3712
## 18 438.3712
## 19 438.3712
## 20 438.3712
## 21 438.3712
## 22 438.3712
## 23 438.3712
## 24 438.3712
## 25 438.3712
## 26 438.3712
## 27 438.3712
## 28 438.3712
## 29 438.3712
## 30 438.3712
## 31 438.3712
## 32 438.3712
## 33 438.3712
## 34 438.3712
## 35 438.3712
## 36 438.3712
## 37 438.3712
## 38 438.3712
## 39 438.3712
## 40 438.3712
## 41 438.3712
## 42 438.3712
## 43 438.3712
## 44 438.3712
## 45 438.3712
## 46 438.3712
## 47 438.3712
## 48 438.3712
## 49 438.3712
## 50 438.3712
## 51 438.3712
## 52 438.3712
## 53 438.3712
## 54 438.3712
## 55 438.3712
## 56 438.3712
## 57 438.3712
## 58 438.3712
## 59 438.3712
## 60 438.3712
## 61 438.3712
## 62 436.3826
## 63 434.3878
## 64 432.4672
## 65 430.6440
## 66 428.8057
## 67 427.3916
## 68 426.4380
## 69 425.6317
## 70 424.0759
## 71 423.9331
## 72 423.3639
## 73 423.1593
## 74 422.9011
## 75 422.1010
## 76 421.1427
## 77 421.0926
## 78 419.1233
anova(permisos.regresion,permisos.regresion_filtrados)
## Analysis of Variance Table
##
## Model 1: positives ~ (sha256 + total + SYSTEM_ALERT_WINDOW + ACCESS_NETWORK_STATE +
## ACCESS_COARSE_LOCATION + WAKE_LOCK + INTERNET + WRITE_EXTERNAL_STORAGE +
## RECEIVE_BOOT_COMPLETED + QUICKBOOT_POWERON + ACCESS_WIFI_STATE +
## GET_TASKS + CHANGE_WIFI_STATE + READ_PHONE_STATE + BLUETOOTH +
## REQUEST_IGNORE_BATTERY_OPTIMIZATIONS + REQUEST_DELETE_PACKAGES +
## QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES +
## WRITE_SMS + CALL_PHONE + VIBRATE + RECEIVE_SMS + READ_CONTACTS +
## FOREGROUND_SERVICE + READ_SMS + READ_PHONE_NUMBERS + GET_ACCOUNTS +
## AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + WRITE_SYNC_SETTINGS +
## USE_FULL_SCREEN_INTENT + CHANGE_NETWORK_STATE + ACCESS_COARSE_UPDATES +
## PROCESS_OUTGOING_CALLS + BLUETOOTH_ADMIN + ACCESS_FINE_LOCATION +
## BOOT_COMPLETED + REQUEST_INSTALL_PACKAGES + RECEIVE_USER_PRESENT +
## ACCESS_LOCATION_EXTRA_COMMANDS + WRITE_CALL_LOG + READ_CALL_LOG +
## ACCESS_BACKGROUND_LOCATION + WRITE_CONTACTS + READ_EXTERNAL_STORAGE +
## BROADCAST_STICKY + MODIFY_AUDIO_SETTINGS + SYSTEM_OVERLAY_WINDOW +
## RECORD_AUDIO + MOUNT_UNMOUNT_FILESYSTEMS + ANSWER_PHONE_CALLS +
## POWER_SERVICE + DISABLE_KEYGUARD + ACCESS_MOCK_LOCATION +
## READ_USER_DICTIONARY + INTERACT_ACROSS_USERS_FULL + CHANGE_WIFI_MULTICAST_STATE +
## READ_INTERNAL_STORAGE + DOWNLOAD_WITHOUT_NOTIFICATION + ACCESS_CACHE_FILESYSTEM +
## ACCESS_MTK_MMHW + DIAGNOSTIC + WRITE_SETTINGS + SAMSUNG_TUNTAP +
## WRITE_SECURE_SETTINGS + PACKAGE_USAGE_STATS + WRITE_INTERNAL_STORAGE +
## READ_LOGS + READ_PRIVILEGED_PHONE_STATE + READ_CALENDAR +
## WRITE_CALENDAR + INJECT_EVENTS + ACCESS_SUPERUSER + ACCESS_NOTIFICATION_POLICY +
## CAMERA + USER_PRESENT + SET_WALLPAPER + INSTALL_PACKAGES +
## DELETE_PACKAGES + RESTART_PACKAGES + CHANGE_CONFIGURATION +
## SET_WALLPAPER_HINTS + BROADCAST_PACKAGE_CHANGED + BROADCAST_PACKAGE_REPLACED +
## BROADCAST_PACKAGE_INSTALL + FLASHLIGHT + BROADCAST_PACKAGE_ADDED +
## READ_APP_BADGE + USES_POLICY_WIPE_DATA + BIND_DEVICE_ADMIN +
## SDCARD_WRITE + START_ACTIVITIES_FROM_BACKGROUND + MANAGE_OWN_CALLS +
## MANAGE_EXTERNAL_STORAGE + BIND_CALL_REDIRECTION_SERVICE) -
## total - sha256
## Model 2: positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON + ACCESS_WIFI_STATE +
## GET_TASKS + READ_PHONE_STATE + BLUETOOTH + QUERY_ALL_PACKAGES +
## SEND_SMS + KILL_BACKGROUND_PROCESSES + WRITE_SMS + RECEIVE_SMS +
## FOREGROUND_SERVICE + GET_ACCOUNTS + AUTHENTICATE_ACCOUNTS +
## RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT + ACCESS_FINE_LOCATION +
## REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW + READ_CALENDAR +
## WRITE_CONTACTS
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 146 1340.2
## 2 159 1390.6 -13 -50.373 0.4221 0.9599
summary(permisos.regresion_filtrados)
##
## Call:
## lm(formula = positives ~ WAKE_LOCK + INTERNET + QUICKBOOT_POWERON +
## ACCESS_WIFI_STATE + GET_TASKS + READ_PHONE_STATE + BLUETOOTH +
## QUERY_ALL_PACKAGES + SEND_SMS + KILL_BACKGROUND_PROCESSES +
## WRITE_SMS + RECEIVE_SMS + FOREGROUND_SERVICE + GET_ACCOUNTS +
## AUTHENTICATE_ACCOUNTS + RECEIVE_MMS + REORDER_TASKS + USE_FULL_SCREEN_INTENT +
## ACCESS_FINE_LOCATION + REQUEST_INSTALL_PACKAGES + SYSTEM_OVERLAY_WINDOW +
## READ_CALENDAR + WRITE_CONTACTS, data = sha_datos_permisos_positives)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0328 -1.1113 0.1194 1.4543 5.9672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.0328 0.3786 58.188 < 2e-16 ***
## WAKE_LOCK -6.6217 1.4399 -4.599 8.63e-06 ***
## INTERNET 3.9830 1.4177 2.810 0.005584 **
## QUICKBOOT_POWERON 6.2217 2.3721 2.623 0.009568 **
## ACCESS_WIFI_STATE 6.4581 1.5943 4.051 7.97e-05 ***
## GET_TASKS 8.5644 2.1436 3.995 9.86e-05 ***
## READ_PHONE_STATE -8.9045 1.8010 -4.944 1.92e-06 ***
## BLUETOOTH -11.7024 2.2404 -5.223 5.44e-07 ***
## QUERY_ALL_PACKAGES 16.6052 5.4534 3.045 0.002725 **
## SEND_SMS 13.7427 2.4937 5.511 1.41e-07 ***
## KILL_BACKGROUND_PROCESSES -12.0794 4.2082 -2.870 0.004657 **
## WRITE_SMS -19.5729 4.6649 -4.196 4.51e-05 ***
## RECEIVE_SMS 16.4283 3.1045 5.292 3.96e-07 ***
## FOREGROUND_SERVICE -7.8335 1.6325 -4.799 3.66e-06 ***
## GET_ACCOUNTS 34.9528 5.8646 5.960 1.57e-08 ***
## AUTHENTICATE_ACCOUNTS -74.0444 10.5581 -7.013 6.31e-11 ***
## RECEIVE_MMS 25.6096 4.2729 5.993 1.33e-08 ***
## REORDER_TASKS -8.7765 4.2649 -2.058 0.041237 *
## USE_FULL_SCREEN_INTENT 25.5969 4.8965 5.228 5.33e-07 ***
## ACCESS_FINE_LOCATION -9.9183 2.2945 -4.323 2.71e-05 ***
## REQUEST_INSTALL_PACKAGES -9.6973 2.8780 -3.369 0.000945 ***
## SYSTEM_OVERLAY_WINDOW 21.6861 4.2880 5.057 1.16e-06 ***
## READ_CALENDAR 11.6998 2.8949 4.042 8.25e-05 ***
## WRITE_CONTACTS -7.8915 3.8798 -2.034 0.043615 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.957 on 159 degrees of freedom
## Multiple R-squared: 0.6336, Adjusted R-squared: 0.5806
## F-statistic: 11.95 on 23 and 159 DF, p-value: < 2.2e-16
En un dataset como éste donde queremos predecir si será positivo en función de los permisos la mejor opción sería aplicar reglas de asociación, pero es que no tengo suficiente memoria ram en el ordenador ni siquiera con minlen=2 y support=0.8 me sigue fallando.
test_apriori <- apriori(sha_datos_permisos_positives, minlen=2, support=0.8)
plot_interactivo <- function(fca){
matriz_descompuesta <- as.matrix(t(fca[["I"]]))
plot_interactivo <- plot_ly(z=matriz_descompuesta, data=as.data.frame(matriz_descompuesta), type = "heatmap", colors = "Greys", x=colnames(matriz_descompuesta), y=rownames(matriz_descompuesta))%>% layout(xaxis = list(autotypenumbers ='strict', type='category'), yaxis = list(autotypenumbers ='strict', dtick=1 ))
return(plot_interactivo)
}
plot_dendograma <- function(fca){
matriz_descompuesta <- as.matrix(t(fca[["I"]]))
heatmap(matriz_descompuesta, col=c("White","Black"))
}
plot_dendograma_interactico <- function(fca){
matriz_descompuesta <- as.matrix(t(fca[["I"]]))
heatmaply(matriz_descompuesta, col=c("White","Black"))
}
Finalmente hacemos un fca para ver como se agrupan los permisos por si algunos más comunes que otros. Hay 46 grupos de permisos que son irreduciblesx y 31 grupos de archivos distintos.
fc_permisos <- FormalContext$new(sha_datos_permisos_positives[,4:99])
fc_permisos$clarify()
fc_permisos$reduce()
fc_permisos$find_concepts()
fc_permisos$find_implications()
fc_permisos$standardize()
## FormalContext with 31 objects and 46 attributes.
## M1 M2 M3 M4 M5 M6 M7 M8 M9 M10 M11 M12 M13 M14 M15 M16 M17 M18 M19 M20 M21
## J1 X X X X X X
## J2 X X X X X
## J3 X X X X X
## J4 X X X X X X X X X X X X X X X
## J5 X X X X X X
## J6 X X X X X
## J7 X X X X X X X X
## J8 X X X X X X X
## J9 X X X X X X X X X X X X X X X X
## J10 X X X X X X X X X X X X X
## Other attributes are: M22, M23, M24, M25, M26, M27, ...
fc_permisos$concepts$plot()
mapa_calor <- plot_interactivo(fc_permisos)
mapa_calor
plot_dendograma(fc_permisos )
Ahora realizamos un árbol de decisión con un porcentaje del 80% de los datos para entrenamiento y 20% para prueba.
sha_datos_permisos_positives.split <- sample.split(sha_datos_permisos_positives, SplitRatio = 0.8 )
sha_datos_permisos_positives.train <- subset(sha_datos_permisos_positives, sha_datos_permisos_positives.split == TRUE)
sha_datos_permisos_positives.test_train <- subset(sha_datos_permisos_positives, sha_datos_permisos_positives.split == FALSE)
permisos.tree <- tree(positives ~ . -total -sha256, data = sha_datos_permisos_positives.train )
plot(permisos.tree)
text(permisos.tree , pretty = 0)
permisos.predecir = predict(permisos.tree, sha_datos_permisos_positives.test_train )
permisos.cv <- cv.tree(permisos.tree)
permisos.cv
## $size
## [1] 7 6 5 4 3 2 1
##
## $dev
## [1] 2480.654 2468.179 2464.927 2403.940 2513.106 2617.395 2858.442
##
## $k
## [1] -Inf 36.60952 50.11179 65.81134 190.74220 212.85556 311.35262
##
## $method
## [1] "deviance"
##
## attr(,"class")
## [1] "prune" "tree.sequence"
plot(permisos.cv)
permisos.rpart.class <- rpart(positives ~ . -total -sha256, data = sha_datos_permisos_positives.train, method = 'class')
permisos.rpart.anova <- rpart(positives ~ . -total -sha256, data = sha_datos_permisos_positives.train, method = 'anova')
rpart.plot(permisos.rpart.class, extra = 100)
## Warning: All boxes will be white (the box.palette argument will be ignored) because
## the number of classes in the response 19 is greater than length(box.palette) 6.
## To silence this warning use box.palette=0 or trace=-1.
rpart.plot(permisos.rpart.anova, extra = 100)
## Análisis de factores Realizo un análisis de factores para intentar
ver si con pocos factores se puede explicar el dataframe y trabajar con
ellos para una visualización, sin embargo para obtener un 90% hacen
falta 20 factores y con 2 sólo explicas un 50% de la varianza.
pr_permisos <- PCA(sha_datos_permisos_positives[,4:99], scale.unit = FALSE, ncp = 95, graph = TRUE)
get_pca(pr_permisos)
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
get_pca_var(pr_permisos)
## Principal Component Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the variables"
## 2 "$cor" "Correlations between variables and dimensions"
## 3 "$cos2" "Cos2 for the variables"
## 4 "$contrib" "contributions of the variables"
get_pca_ind(pr_permisos)
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
fviz_eig(pr_permisos)
fviz_screeplot(pr_permisos)
ggplotly(fviz_contrib(pr_permisos, choice = "var"))
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#Bibliografía https://www.alexejgossmann.com/benchmarking_r/
https://cran.r-project.org/web/packages/microbenchmark/microbenchmark.pdf
https://rpubs.com/rdelgado/405322 https://techvidvan.com/tutorials/decision-tree-in-r/#:~:text=Decision%20trees%20are%20a%20graphical,as%20well%20as%20classification%20problems. https://www.guru99.com/r-decision-trees.html https://plotly.com/r/ml-regression/ https://www.statmethods.net/stats/regression.html